I. Setup

libraries needed:

library(base)
library(datasets)
library(Biobase)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(GEOquery)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(Seurat)
## Attaching SeuratObject
library(CellChat)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, path, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## Loading required package: ggplot2
library(dplyr)
library(patchwork)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ tibble  3.1.8     ✔ purrr   1.0.1
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tibble::as_data_frame() masks igraph::as_data_frame(), dplyr::as_data_frame()
## ✖ dplyr::combine()        masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compose()        masks igraph::compose()
## ✖ tidyr::crossing()       masks igraph::crossing()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ igraph::groups()        masks dplyr::groups()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ ggplot2::Position()     masks BiocGenerics::Position(), base::Position()
## ✖ purrr::simplify()       masks igraph::simplify()
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
#library(liana)
library(ggplot2)
library(devtools)
## Loading required package: usethis

loading data and create Seurat object, and check out information of the object

mosk.data <- Read10X(data.dir = "/Users/xiaoyizheng/Downloads/Praktikum/Bioinfo_Einarbeiten/CellChatSelf/mouse/GSE113854_RAW/")
#mosk.data

mosk <- CreateSeuratObject(counts = mosk.data, project = "MoSk", min.cells = 3, min.features = 200)
mosk
## An object of class Seurat 
## 17090 features across 22322 samples within 1 assay 
## Active assay: RNA (17090 features, 0 variable features)

II. Pre-processing

to-do: quality control

mosk[["percent.mt"]] <- PercentageFeatureSet(mosk, pattern = "^MT-") #mitoc. genes -> Zeichen fuer schlechte Qualitaet
VlnPlot(mosk, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) #All cells have the same value of percent.mt.(maybe bc already filtered?)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.

head(mosk@meta.data, 5) #Show QC metrics for the first 5 cells
##                    orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGAATTCCC-1       MoSk       1475          783          0
## AAACCTGAGACCGGAT-1       MoSk       4148         1624          0
## AAACCTGAGACGCACA-1       MoSk       3136         1342          0
## AAACCTGAGATGTGGC-1       MoSk       3138         1323          0
## AAACCTGAGCTGCGAA-1       MoSk       4029         1721          0
mosk.data[c("Igf2", "Cck", "Pf4"), 1:30]
## 3 x 30 sparse Matrix of class "dgCMatrix"
##   [[ suppressing 30 column names 'AAACCTGAGAATTCCC-1', 'AAACCTGAGACCGGAT-1', 'AAACCTGAGACGCACA-1' ... ]]
##                                                                   
## Igf2 . . . . . 1 . .  . . . . . .  . . . 1 . . . . . . . . . . . .
## Cck  . . . . . . . .  . . . . . .  . . . . . . . . . . . . . . . .
## Pf4  . . . . . . . . 11 . . . . . 25 . . 1 1 . 1 1 . 1 . . . . . 2
plot1 <- FeatureScatter(mosk, feature1 = "nCount_RNA", feature2 = "percent.mt") #since all have same mt quality...
## Warning in cor(x = data[, 1], y = data[, 2]): the standard deviation is zero
plot2 <- FeatureScatter(mosk, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") #positively correlated
plot1 + plot2

#filter "mosk"
mosk <- subset(mosk, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) #hmmm but mt for filtering unneccessary

III.normalization

mosk <- NormalizeData(mosk, normalization.method = "LogNormalize", scale.factor = 10000)

IV.feature selection

mosk <- FindVariableFeatures(mosk, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(mosk), 10) # Identify the 10 most highly variable genes
top10
##  [1] "Hbb-bs" "Hba-a1" "Ccl21a" "Hbb-bt" "Saa3"   "Acp5"   "Ccl5"   "S100a8"
##  [9] "Mmp9"   "S100a9"
plot1 <- VariableFeaturePlot(mosk)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2

V. Scaling

all.genes <- rownames(mosk)
mosk <- ScaleData(mosk, features = all.genes)
## Centering and scaling data matrix

VI. Linear Dimension Reduction

mosk <- RunPCA(mosk, features = VariableFeatures(object = mosk))
## PC_ 1 
## Positive:  Col1a1, Col1a2, Col3a1, Sparc, Bgn, Col5a2, Dcn, Meg3, Postn, Col6a1 
##     Col6a2, Fstl1, Col12a1, Col5a1, Aebp1, Lum, Serpinh1, Mfap4, Col6a3, Mmp2 
##     Thbs2, Gpx3, Fbln2, Serpinf1, Tmsb10, Fn1, Lgals1, Lrrc15, Itm2a, Timp2 
## Negative:  Fth1, Tyrobp, Fcer1g, Lyz2, Ftl1, Tmsb4x, Ctss, H2-D1, C1qb, Apoe 
##     C1qa, C1qc, Cd52, Wfdc17, Ptpn18, Pf4, Lgals3, Laptm5, Ms4a7, Aif1 
##     Cd74, Clec4n, Ucp2, Lst1, Ms4a6c, Cd68, Id2, Alox5ap, Cd14, Fxyd5 
## PC_ 2 
## Positive:  Col4a1, Col4a2, Cdh5, Igfbp7, Pecam1, Egfl7, Sparcl1, Crip2, Plvap, Ramp2 
##     Col15a1, Col18a1, Cav1, Ctla2a, Emcn, Myct1, Adgrf5, Adgrl4, Kdr, Cd93 
##     Esam, Mcam, Ly6c1, Pdlim1, Fabp4, Cldn5, Mmrn2, Gng11, Tinagl1, Flt1 
## Negative:  Tyrobp, Fcer1g, Lyz2, Lgals3, C1qb, Ctss, Apoe, Ftl1, C1qa, C1qc 
##     Fth1, Pf4, Wfdc17, Ms4a7, Lst1, Cd52, Aif1, Lgals1, Cstb, Cd74 
##     Laptm5, Clec4n, Id2, Ifi27l2a, Cd68, Ms4a6c, Alox5ap, Cd14, Csf1r, Cyba 
## PC_ 3 
## Positive:  Rgs5, Ndufa4l2, Gm13889, Higd1b, Des, Notch3, Serpine2, Cox4i2, Il6, Acta2 
##     Mylk, Rasgrp2, Cygb, Thy1, Ebf1, Ppp1r14a, Crip1, Mgp, Col4a1, Mustn1 
##     Pdgfa, Myl9, S100a4, Phlda1, Abcc9, Tppp3, Col4a2, Gucy1a3, Parm1, Actg2 
## Negative:  Cdh5, Pecam1, Ctla2a, Egfl7, Cd93, Ramp2, Cd34, Ly6c1, Myct1, Kdr 
##     S100a16, Mfap4, Adgrl4, Emcn, Igf1, Mmrn2, Lum, Cldn5, Igfbp3, Ly6a 
##     Mest, Plvap, Ptprb, Ptn, Gpihbp1, Tie1, Fbln2, Gja1, Aebp1, Icam2 
## PC_ 4 
## Positive:  Malat1, Il6, Cxcl1, Meg3, Serping1, Mt1, Gm13889, Rgs5, Col14a1, Errfi1 
##     Sparcl1, Ogn, Eln, Neat1, Ccl2, Mt2, Klf4, Nov, Clec3b, Fmod 
##     Col4a1, Ebf1, Procr, Fosb, H19, Id3, Col4a2, Dnajb1, Fxyd1, Cygb 
## Negative:  2810417H13Rik, Birc5, Stmn1, Ube2c, Top2a, Tuba1b, Hmgb2, Cenpa, Cdca3, H2afz 
##     Cks2, Ccnb2, Prc1, Cks1b, Ccna2, 2700094K13Rik, Cenpf, Cdca8, Smc2, Cdk1 
##     Ccnb1, Lockd, Mki67, Nusap1, Tpx2, Spc24, Tubb5, Tk1, H2afx, Cenpm 
## PC_ 5 
## Positive:  Pf4, Apoe, Sepp1, C1qc, C1qb, C1qa, Cxcl1, Ctsd, Ms4a7, Neat1 
##     Lyz2, Lgmn, Fosb, Gas6, Klf4, Syngr1, Cxcl2, Ier3, Mrc1, Ccl7 
##     H19, Ftl1, Trem2, Ccl2, Col14a1, Igfbp4, Lst1, Top2a, Igf1, Slc40a1 
## Negative:  Tmsb10, S100a6, H2-Eb1, Ccr7, Samsn1, H2-Aa, Ramp3, H2-Ab1, Crabp1, Klrd1 
##     Bcl2a1d, Tbc1d4, Tmsb4x, Icos, Ifitm1, Napsa, Ptprcap, Cd209a, Cytip, Cd52 
##     H2-DMb1, Tnfrsf18, H2-DMa, AW112010, Cd74, Il1r2, Cd7, Cd3d, Tnfrsf4, Tnfrsf9
print(mosk[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  Col1a1, Col1a2, Col3a1, Sparc, Bgn 
## Negative:  Fth1, Tyrobp, Fcer1g, Lyz2, Ftl1 
## PC_ 2 
## Positive:  Col4a1, Col4a2, Cdh5, Igfbp7, Pecam1 
## Negative:  Tyrobp, Fcer1g, Lyz2, Lgals3, C1qb 
## PC_ 3 
## Positive:  Rgs5, Ndufa4l2, Gm13889, Higd1b, Des 
## Negative:  Cdh5, Pecam1, Ctla2a, Egfl7, Cd93 
## PC_ 4 
## Positive:  Malat1, Il6, Cxcl1, Meg3, Serping1 
## Negative:  2810417H13Rik, Birc5, Stmn1, Ube2c, Top2a 
## PC_ 5 
## Positive:  Pf4, Apoe, Sepp1, C1qc, C1qb 
## Negative:  Tmsb10, S100a6, H2-Eb1, Ccr7, Samsn1
VizDimLoadings(mosk, dims = 1:2, reduction = "pca")

DimPlot(mosk, reduction = "pca")

#DimPlot(mosk, reduction = "pca",label = TRUE)
DimHeatmap(mosk, dims = 1, cells = 500, balanced = TRUE) #heatmap: exploration of the primary sources of heterogeneity

DimHeatmap(mosk, dims = 1:15, cells = 500, balanced = TRUE)

VII. Determination of Dimension

mosk <- JackStraw(mosk, num.replicate = 100)
mosk <- ScoreJackStraw(mosk, dims = 1:20)
JackStrawPlot(mosk, dims = 1:15) 
## Warning: Removed 21000 rows containing missing values (`geom_point()`).

ElbowPlot(mosk)

VIII. Clustering Cells

mosk <- FindNeighbors(mosk, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
mosk <- FindClusters(mosk, resolution = 0.3) #13 clusters aus paper #res = 0.3 cluster 0->12
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 22172
## Number of edges: 707614
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9307
## Number of communities: 13
## Elapsed time: 4 seconds
head(Idents(mosk), 5)
## AAACCTGAGAATTCCC-1 AAACCTGAGACCGGAT-1 AAACCTGAGACGCACA-1 AAACCTGAGATGTGGC-1 
##                  3                  9                  8                  0 
## AAACCTGAGCTGCGAA-1 
##                  6 
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12

IX. Non-linear Dimension Reduction

mosk <- RunUMAP(mosk, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 09:24:17 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:24:17 Read 22172 rows and found 10 numeric columns
## 09:24:17 Using Annoy for neighbor search, n_neighbors = 30
## 09:24:17 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:24:19 Writing NN index file to temp file /var/folders/w_/82bj411144v93vqxt1tbsg9h0000gn/T//RtmpxwQyaP/filed497843b666
## 09:24:19 Searching Annoy index using 1 thread, search_k = 3000
## 09:24:27 Annoy recall = 100%
## 09:24:28 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:24:29 Initializing from normalized Laplacian + noise (using RSpectra)
## 09:24:30 Commencing optimization for 200 epochs, with 907130 positive edges
## 09:24:42 Optimization finished
DimPlot(mosk, reduction = "umap", label = TRUE)

##saveRDS(mosk, file = "/Users/xiaoyizheng/Downloads/Bioinfo_Einarbeiten/moskOutput.rds") 

X. Cluster Biomarkers

###retrived from CellChat paper summplm.

“Crabp1”, “Des”, “Rgs5”, “C1qb”, “Pf4”, “Eln”, “Ogn”, “Cd93”, “Pecam1”,“Birc5”, “Ccnb2”, “Icos”, “Nkg7”, “Ccr7”, “H2-DMb1”, “Hdc”, “G0s2”, “Cadm4”, “Itih5”, “Hba-a2”, “Hbb-bs”, “Acp5”, “Mmp9”, “Ccl21a”, “Lyve1”

#FIB2:
VlnPlot(mosk, features = c("Des", "Rgs5"), sort = 'decreasing')

#MYL:
VlnPlot(mosk, features = c("C1qb", "Pf4"), sort = 'decreasing')

#FIB-3:
VlnPlot(mosk, features = c("Eln", "Ogn"), sort = 'decreasing')

#ENDO:
VlnPlot(mosk, features = c("Cd93", "Pecam1"), sort = 'decreasing')

#FIB-4:
VlnPlot(mosk, features = c("Birc5", "Ccnb2"), sort = 'decreasing')

#TC:
VlnPlot(mosk, features = c("Icos", "Nkg7"), sort = 'decreasing')

#BC:
VlnPlot(mosk, features = c("Ccr7", "H2-DMb1"), sort = 'decreasing')

#FIB-5:
VlnPlot(mosk, features = c("Hdc",  "G0s2"), sort = 'decreasing')

#SCH:
VlnPlot(mosk, features = c("Cadm4", "Itih5"), sort = 'decreasing') + labs(title = "SCH")

#RBC:
VlnPlot(mosk, features = c("Hba-a2", "Hbb-bs"), sort = 'decreasing')

#DEN:
VlnPlot(mosk, features = c("Acp5", "Mmp9"), sort = 'decreasing')

#LYME:
VlnPlot(mosk, features = c("Ccl21a", "Lyve1"), sort = 'decreasing')

method to retrieve markers according to Seurat tutorial:

#mosk.markers <- FindAllMarkers(mosk, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.2) #0.2 is okki
#mosk.markers %>%
#  group_by(cluster) %>%
#  slice_max(n = 2, order_by = avg_log2FC)

heatmap with top 20 markers found by alt. method above, instead of from CellChat paper

#mosk.markers %>%
#  group_by(cluster) %>%
#  top_n(n = 10, wt = avg_log2FC) -> top10
#DoHeatmap(mosk, features = top10$gene) 
#+ NoLegend()

feature plots for validation

#all in one:
#FeaturePlot(mosk, features = c("Crabp1", "Des", "Rgs5", "C1qb", "Pf4", "Eln", "Ogn", "Cd93", "Pecam1","Birc5", "Ccnb2", "Icos", "Nkg7", "Ccr7", "H2-DMb1", "Hdc",  "G0s2", "Cadm4", "Itih5", "Hba-a2", "Hbb-bs", "Acp5", "Mmp9", "Ccl21a", "Lyve1"))
FeaturePlot(mosk, features = "Crabp1")

FeaturePlot(mosk, features = c("Des", "Rgs5", "C1qb", "Pf4", "Eln", "Ogn", "Cd93", "Pecam1","Birc5", "Ccnb2", "Icos", "Nkg7"))

FeaturePlot(mosk, features = c("Ccr7", "H2-DMb1", "Hdc",  "G0s2", "Cadm4", "Itih5", "Hba-a2", "Hbb-bs", "Acp5", "Mmp9", "Ccl21a", "Lyve1"))

XI. Assigning Cell Type Identity to Clusters

new.cluster.ids <- c("FIB-1", "FIB-3", "MYL", "FIB-2", "BC", "LYME", "FIB-4", "RBC", "ENDO", "TC", "FIB-5", "SCH", "DEN")
names(new.cluster.ids) <- levels(mosk)
mosk <- RenameIdents(mosk, new.cluster.ids)
DimPlot(mosk, reduction = "umap", label = TRUE, pt.size = 0.5)

dot plot for a better overview:

markers.to.plot <- c("Crabp1", 
                     "Hba-a2", "Hbb-bs",
                     "Eln", "Ogn",
                     "Ccr7", "H2-DMb1",
                     "C1qb", "Pf4",
                     "Des", "Rgs5",
                     "Cd93", "Pecam1",
                     "Ccl21a", "Lyve1",
                     "Birc5", "Ccnb2",
                     "Nkg7",
                     "Hdc",  "G0s2",
                     "Acp5", "Mmp9")


DotPlot(
  mosk,
  assay = NULL,
  features = markers.to.plot,
  cols = c("lightgrey", "blue"),
  col.min = -2.5,
  col.max = 2.5,
  dot.min = 0,
  dot.scale = 6,
  idents = NULL,
  group.by = NULL,
  split.by = NULL,
  cluster.idents = FALSE,
  scale = TRUE,
  scale.by = "radius",
  scale.min = NA,
  scale.max = NA
) + RotatedAxis()                   

XII. Identify significant interaction using LIANA

installing LIANA

if (!requireNamespace(“BiocManager”, quietly = TRUE)) install.packages(“BiocManager”)

if (!requireNamespace(“remotes”, quietly = TRUE)) install.packages(“remotes”)

remotes::install_github(‘saezlab/liana’, force = TRUE) ###devtools::install_github(‘saezlab/liana’, force = TRUE)

library(liana) liana_path <- system.file(package = “liana”) liana_test <- liana_wrap(mosk, resource = c(“Consensus”))

Liana returns a list of results, each element of which corresponds to a method => we need result through CellPhone

liana_test %>% dplyr::glimpse() ###=> list of 5: results through natmi, connectome, logfc, sca, cellphonedb ###extracting results only those via CellPhoneDB liana_test_cpdb <- liana_test$cellphonedb liana_test_cpdb CrossTalkR_input <- select(liana_test_cpdb, source, ligand, target, receptor, lr.mean) write.csv(CrossTalkR_input, file=“/Users/xiaoyizheng/Downloads/Praktikum/mosk12_liana_res.csv”)

###we probably dont want to aggregate since we only need for now from CellPhoneDB right? ###liana_test <- liana_test %>% liana_aggregate() ###dplyr::glimpse(liana_test) ###liana_test %>% ### liana_dotplot(source_groups = c(“FIB-1”), ### target_groups = c(“FIB-2”, “FIB-3”, “FIB-4”), ### ntop = 20)

XIII. CrossTalkR

install_github(“hadley/devtools”) install.packages(“devtools”) devtools::install_github(“https://github.com/CostaLab/CrossTalkeR”, force = TRUE, build_vignettes = TRUE) require(CrossTalkeR)

suppressPackageStartupMessages({require(CrossTalkeR)}) suppressPackageStartupMessages({require(igraph)}) suppressPackageStartupMessages({require(ggraph)}) suppressPackageStartupMessages({require(ggplot2)})

paths <- c(‘mosk12’ = system.file(“/Users/xiaoyizheng/Downloads/Praktikum”, “mosk12_liana_res.csv”, package = “CrossTalkeR”)) ###why system.file() here? it returns paths, but cant we just directly use path or something?

genes <- c(‘Hba-a2’) data <- generate_report(paths, genes, out_path=paste0(getwd(),‘/html/’), threshold=0, out_file = ‘mosk12_test.html’, output_fmt = “html_document”, report = TRUE)